/*******************************************************************************

This code file finds doppelganger lots for use in the simulated-instruments GMM.

Please modify by hand (at the flagged lines) for the alternative thresholds of 10% and 20%.

*******************************************************************************/

*** Manage settings

	run "~/Dropbox (MIT)/Research/NYC421a/code/modules/settings.do"
	
*** Load Stata-TeX

	do "$code/modules/stata-tex.do"
	cd "$tables/estimates"

********************************************************************************
* Simulated instruments GMM
********************************************************************************

*** For each lot, find doppelganger lots

	* Set up tempfiles
	
	clear
	tempfile record
	set obs 1
	gen bbl = .
	save `record', replace
	
	clear
	tempfile master
	set obs 1
	gen bbl = .
	save `master', replace
	
	quietly use "$data/clean/cleaned_data.dta", clear
	local N = _N

	* Obtain simulated instrument using each development
	
	* Note: for memory management, I split this into jobs of ~1,000 true buildings
	local M = 13
	
	forvalues n_outer = 1/`M' {
		
		* Loop through ~1,000 observations: e.g., for M=1, range of obs is 1 - 1000
		
		local min = 1 + 1000*(`n_outer'-1)
		local max = min(`N',1000*`n_outer')
		
		forvalues n = `min'/`max' {
		
			di `n'
		
			* Reload data
			quietly use "$data/clean/cleaned_data.dta", clear
			gen key_obs = _n == `n'
			
			gen double bbl_match = bbl
			format bbl_match %18.0f
						
			* Fix BBL, tract, GEA of all other developments to match the simulated development
			foreach v of varlist bbl ct2010 gea yearpermit nta npp sub_gea gw_gea {
				quietly replace `v' = `v'[`n']
			}
			quietly replace assesstot = assessland[`n'] + (assesstot-assessland)
			quietly replace assessland = assessland[`n']

			* Drop other developments that do not match the simulated development on some characeristics
			
				* Match on borough
				gegen borough_key = max(borough*key_obs)
				quietly drop if borough != borough_key
				drop borough_key
				
				* Match on lotsize
				* Main assumption = +/- 20% lotsize
				gegen lotarea_key = max(lotarea*key_obs)
				*quietly drop if abs(ln(lotarea/lotarea_key)) > 0.2
				
					* Alt assumption 1: +/- 10% lotsize
					*quietly drop if abs(ln(lotarea/lotarea_key)) > 0.1
					
					* Alt assumption 2: +/- 30% lotsize
					quietly drop if abs(ln(lotarea/lotarea_key)) > 0.3
							
				* Match on zoning district
				gegen zonedist_key = max(zonedist*key_obs)
				quietly drop if zonedist != zonedist_key
				drop zonedist_key
							
			* Drop own observation 
			quietly drop if key_obs == 1
							
			* Calculate the tax rate for each development as if it had the endogenous features of the simulated development
			quietly run "$code/modules/dtaxrate_calc.do"
			
			* Keep BBL and simulated instrument
			quietly keep bbl bbl_match dtaxrate_onsite numfloors unitsres bldgarea builtfar condo assesstot exempttot frac_assess underassess
			
			quietly append using `record'
			quietly save `record', replace
			
		}
			
			append using `master'
			save `master', replace
			
			quietly clear
			quietly set obs 1
			quietly gen bbl = .
			quietly save `record', replace
		
	}

	
*** Save matches to file
	
	use `master', clear
	*save "$data/raw/matched_buildings.dta", replace
	
	* For alternative lotsize assumptions:
	
	*save "$data/raw/matched_buildings_alt1.dta", replace
	save "$data/raw/matched_buildings_alt2.dta", replace
	
	
